#COMPILE EXE
#DIM ALL
#INCLUDE ONCE "Win32Api.inc"                                      ' Windows standard stuff

GLOBAL tWordCtr AS LONG
GLOBAL uWordCtr AS LONG
GLOBAL tWords() AS STRING
GLOBAL uWords() AS STRING

FUNCTION PBMAIN () AS LONG
LOCAL fnum1, fnum2, i, j, k, l, L1, L2, MstrKwdsCtr, MstrKwdsCtrU, FilesCtr, lctr AS LONG, ltxt, t AS STRING
LOCAL tTopicID AS LONG, tPrefix, tCategory, tFlags, tTitle AS STRING
DIM fields(1 TO 100) AS STRING
DIM subfields(1 TO 100) AS STRING
DIM words(1 TO 100) AS STRING
DIM MstrKwds(1 TO 5000) AS STRING
DIM MstrKwdsU(1 TO 5000) AS STRING
DIM Files(1 TO 750) AS STRING
DIM Topics(0 TO 750) AS STRING
DIM TopicsKwds(0 TO 750) AS STRING
DIM TopicsCat(0 TO 750) AS STRING
DIM tWords(1 TO 10000) AS GLOBAL STRING
DIM uWords(1 TO 10000) AS GLOBAL STRING

   '----- Load the Master KW table
   fnum1 = FREEFILE                                               ' Get fnum
   OPEN "HnDMasterKW.txt" FOR INPUT ACCESS READ AS # fnum1        ' Open it up
   DO WHILE ISFALSE EOF(fnum1)                                    ' If not EOF
      LINE INPUT# fnum1, ltxt                                     ' Get a record
      IF UCASE$(ltxt) = ltxt THEN                                 ' Is Kwd uppercase
         INCR MstrKwdsCtrU                                        '
         MstrKwdsU(MstrKwdsCtrU) = TRIM$(ltxt)                    ' Save it
      ELSE                                                        '
         INCR MstrKwdsCtr                                         '
         MstrKwds(MstrKwdsCtr) = UCASE$(TRIM$(ltxt))              ' Save it
      END IF                                                      '
   LOOP                                                           ' Loop back till file all read
   CLOSE fnum1                                                    ' Close it up

   '----- Now open the main files
   fnum1 = FREEFILE                                               ' Get files open
   fnum2 = FREEFILE                                               '
   OPEN "HnD.pas" FOR INPUT ACCESS READ AS # fnum1                '
   OPEN "HnDIndex.txt" FOR OUTPUT ACCESS WRITE AS # fnum2         '

   '----- Process the file
   DO WHILE ISFALSE EOF(fnum1)                                    ' If not EOF
      LINE INPUT# fnum1, ltxt                                     ' Get a record
      ltxt = SHRINK$(ltxt)                                        ' Tidy it up
      IF LEFT$(ltxt, 5) <> "HELP_" THEN ITERATE DO                ' Ignore lines we don't want

      '----- Extract the full topic title
      i = INSTR(-2, ltxt, $DQ)                                    ' Find starting quote of Title
      j = INSTR(-1, ltxt, $DQ)                                    ' Find ending quote of Title
      tTitle = MID$(ltxt, i + 1 TO j - 1)                         ' Separate Title
      PARSE ltxt, fields(), " "                                   ' Split apart by spaces
      tTopicID = VAL(fields(3))                                   ' Get topic ID
      Topics(tTopicID) = tTitle                                   ' Save in topic table

      '----- Extract the keywords from the Topic ID
      tPrefix = MID$(fields(1), 6)                                ' Get prefix from 1st 'word'

      '----- Convert trailing _ to $ since HnD won't allow $ in a Topic ID
      IF RIGHT$(tPrefix, 1) = "_" THEN                            ' Trailing _ ?
         tPrefix = CLIP$(RIGHT, tPrefix, 1) + "$"                 ' Make it a $
      END IF

      j = PARSECOUNT(tPrefix, "_")                                ' Get count of subfields
      PARSE tPrefix, subfields(), "_"                             ' Separate the Topic ID
      TopicsCat(tTopicID) = subfields(1)                          ' Save Category
      TCategory = subfields(1)                                    '

      '----- Handle words with ewmbedded _ chars
      i = 2                                                       ' Set starting 'word'
      NextWord:
      t = UCASE$(subfields(i))                                    ' Get uppercase version
      Nextappend:
      IF i + 1 > j THEN
         TopicsKwds(tTopicID) += t + ","                          ' Add to Kwds string
         GOTO TopicDone
      END IF
      IF ISNOTNULL(subfields(i)) AND ISNOTNULL(subfields(i + 1)) THEN ' A simple word
         TopicsKwds(tTopicID) += t + ","                          ' Add to Kwds string
         INCR i                                                   ' bump index
         IF i <= j THEN GOTO NextWord                             ' And loop back

      ELSE                                                        ' Else an embedded _
         t += "_" + UCASE$(subfields(i + 2))                      ' Append _ and next word
         i += 2                                                   '
         IF i < j THEN GOTO NextAppend                            ' Loop back
         TopicsKwds(tTopicID) += t + ","                          ' Add to Kwds string
      END IF
      TopicDone:                                                  '

      '----- Extract search words from the topic title
      j = PARSECOUNT(tTitle, ANY " ")                             ' Get count of words in Title
      PARSE tTitle, words(), ANY " "                              ' Separate the Title
      FOR i = 1 TO j                                              ' Add each word as a KW
         t = UCASE$(words(i))                                     ' Get uppercase version
         IF t <> "AND" AND t <> "OF"      AND _                   ' Eliminate 'noise' words
            t <> "A"   AND t <> "THE"     AND _                   '
            t <> "AN"  AND t <> "SPFLITE" AND _                   '
            t <> "TO"  AND t <> "AS"      AND _                   '
            t <> "AT"  AND t <> ""        AND _                   '
            t <> "/"                      AND _                   '
            t <> "-"   AND t <> "WITH"    THEN                    '
            t = REMOVE$(t, ANY ",;/&()")                          ' Clear extra punctustion
            AddWordU(FORMAT$(tTopicID, "000") + t)                ' Accumulate words from the Title
            IF RIGHT$(t, 1) = "$" THEN _                          ' If an FM string function
               AddWordU(FORMAT$(tTopicID, "000") + CLIP$(RIGHT, t, 1)) ' Add it without the $ as well
         END IF                                                   '
      NEXT i                                                      '

      j = PARSECOUNT(tTitle, ANY "_")                             ' Get count of words in Title
      PARSE tTitle, words(), ANY "_"                              ' Separate the Title
      FOR i = 1 TO j                                              ' Add each word as a KW
         t = UCASE$(words(i))                                     ' Get uppercase version
         IF t <> "AND" AND t <> "OF"      AND _                   ' Eliminate 'noise' words
            t <> "A"   AND t <> "THE"     AND _                   '
            t <> "AN"  AND t <> "SPFLITE" AND _                   '
            t <> "TO"  AND t <> "AS"      AND _                   '
            t <> "AT"  AND t <> ""        AND _                   '
            t <> "/"                      AND _                   '
            t <> "-"   AND t <> "WITH"    THEN                    '
            t = REMOVE$(t, ANY ",;/&()")                          ' Clear extra punctustion
            AddWordU(FORMAT$(tTopicID, "000") + t)                ' Accumulate words from the Title
            IF RIGHT$(t, 1) = "$" THEN _                          ' If an FM string function
               AddWordU(FORMAT$(tTopicID, "000") + CLIP$(RIGHT, t, 1)) ' Add it without the $ as well
         END IF                                                   '
      NEXT i                                                      '

   LOOP                                                           ' Loop back till file all read

   '----- Write the Topic IDs out
   FOR i = 0 TO 750                                               ' Dump Topic Titles
      IF ISNULL(Topics(i)) THEN ITERATE FOR                       ' Ignore null entries
      t = "T" + FORMAT$(i, "000") + TopicsCat(i)+ Topics(i)       ' Format a line
      PRINT # fnum2, t                                            ' Write it
   NEXT i

   '----- Write the Topic Kwds out
   FOR i = 0 TO 750                                               ' Dump Topic Titles
      IF ISNULL(TopicsKwds(i)) THEN ITERATE FOR                   ' Ignore null entries
      t = "K" + FORMAT$(i, "000") + TopicsKwds(i)                 ' Format a line
      t = CLIP$(RIGHT, t, 1)                                      ' Remove trailing comma
      PRINT # fnum2, t                                            ' Write it
   NEXT i

   CLOSE fnum1                                                    ' Close it up

   '----- Now do the fullHelp text search
   t = DIR$("D:\Documents\SPFLite3\Resource File\Text\*.txt")     ' Get first entry
   WHILE LEN(t)                                                   ' While we got something
      INCR FilesCtr                                               ' Bump count
      Files(FilesCtr) = "D:\Documents\SPFLite3\Resource File\Text\" + t ' Save answer
      t = DIR$(NEXT)                                              ' Try for another
   WEND                                                           '

   '----- Now process each file
   FOR k = 1 TO FilesCtr                                          ' Loop through the folder
      fnum1 = FREEFILE                                            ' Get fnum
      OPEN Files(k) FOR INPUT ACCESS READ AS # fnum1              ' Open it up
      lctr = 0                                                    ' Reset line count
      DO WHILE ISFALSE EOF(fnum1)                                 ' If not EOF
         LINE INPUT# fnum1, ltxt                                  ' Get a record
         INCR lctr                                                ' Count it

         '----- 1st line is the Topic Title
         IF lctr = 1 THEN                                         ' First line?
            t = TRIM$(ltxt)                                       ' Trim just in case
            ARRAY SCAN Topics() FOR 750, =t, TO i                 ' Find it
            IF i = 0 THEN                                         '
               MSGBOX "Can't find: " + t                          ' Report error
            END IF                                                '
            tTopicID = i - 1                                      ' Adjust for 0-based table
            ITERATE DO                                            ' Done with this line
         END IF                                                   '

         '----- First remove rubish lines
         IF INSTR(ltxt, "") THEN ITERATE DO                     ' Eliminate rubbish
         IF INSTR(ltxt, "Deluca") THEN ITERATE DO                 '
         IF LEFT$(ltxt, 16) = "Created with the" THEN ITERATE DO  '

         '----- OK, now do the word by word routine
         i = PARSECOUNT(ltxt, " ")                                ' Get how many words
         REDIM words(1 TO i) AS GLOBAL STRING                     '
         PARSE ltxt, words(), " "                                 ' Parse out the words
         FOR j = 1 TO i                                           ' Look at each
            t = words(j)                                          '
            RESET L1, L2
            IF VERIFY(t, "()[]<>") <> 0 THEN                      ' If just ( < [ ] > ) - leave alone
               t = REMOVE$(t, ANY $DQ + "~~!@#$%^&*()_-+={[}]:;'|\<,>.?/ ") ' Clean out punctuation
            END IF                                                '
            IF VERIFY(t, "0123456789") = 0 OR ISNULL(t) THEN ITERATE FOR  ' Ignore simple numbers & nulls
            IF LEN(t) = 2 AND VERIFY(t, "0123456789ABCDEF") = 0 THEN ITERATE FOR
            IF UCASE$(t) <> t THEN                                ' If an uppercase word
               ARRAY SCAN MstrKwdsU() FOR MstrKwdsctrU, =t, TO L1 ' Find it
            END IF                                                '
            IF L1 = 0 THEN                                        ' Not found in UCASE Table?
               ARRAY SCAN MstrKwds() FOR MstrKwdsctr, COLLATE UCASE, =t, TO L2 ' Find it
            END IF                                                '
            IF (L1 + L2) = 0 THEN ITERATE FOR                     ' Not found?  We ignore it
            Addword(FORMAT$(tTopicID, "000") + t)                 ' Add to our list
         NEXT j                                                   '
      LOOP                                                        ' Loop back till file all read
      CLOSE fnum1                                                 ' Close up the file
   NEXT k                                                         ' On to next file

   '----- Write the Words out
   ARRAY SORT tWords() FOR tWordCtr                               ' Sort it
   FOR i = 1 TO tWordCtr                                          ' Dump the table
      t = "W" + tWords(i)                                         '
      PRINT # fnum2, t                                            '
   NEXT i                                                         '
   ARRAY SORT uWords() FOR uWordCtr                               ' Sort it
   FOR i = 1 TO uWordCtr                                          ' Dump the table
      t = "U" + uWords(i)                                         '
      PRINT # fnum2, t                                            '
   NEXT i                                                         '

   '----- Shut it all down
   Shut:
   CLOSE fnum2                                                    '
   MSGBOX "All Done"

END FUNCTION

SUB AddWord(t AS STRING)
'----- Accumulate search words, filter out duplicates
REGISTER i AS LONG
REGISTER j AS LONG
LOCAL tnum, tkwd1, tkwd2 AS STRING, k, b AS LONG
   IF ISNULL(t) THEN EXIT SUB                                     ' Ignore nulls
   tnum = LEFT$(t, 3): tkwd1 = UCASE$(MID$(t, 4))                 ' Split Kwd and Topic number
   IF tWordCtr THEN                                               ' Any table yet
      FOR i = 1 TO tWordCtr                                       ' Yes, search it
         b = INSTR(tWords(i), "\")                                ' Get break position
         tkwd2 = LEFT$(tWords(i), b - 1)                          ' Extract kwd from current entry
         IF tkwd1 = tkwd2 THEN                                    ' If found, try adding the tnum to the list
            FOR j = b + 1 TO LEN(tWords(i)) STEP 6                ' See if tnum already there
               IF tnum = MID$(tWords(i), j, 3) THEN               ' If so, bump th 'hit' count
                  k = VAL(MID$(tWords(i), j + 3, 3)): INCR k      ' Get count and bump it
                  MID$(twords(i), j + 3, 3) = FORMAT$(k, "000")   ' Stuff it back in
                  EXIT SUB                                        ' And we're all done
               END IF                                             '
            NEXT j                                                '
            twords(i) += tnum + "001"                             ' Not there, add it
            EXIT SUB                                              ' and we're done
         END IF                                                   '
      NEXT                                                        '
      INCR tWordCtr                                               '
      IF tWordCtr > UBOUND(tWords()) THEN _                       ' Enlarge table if needed
         REDIM PRESERVE tWords(1 TO 2 * tWordCtr) AS GLOBAL STRING'
      tWords(tWordCtr) = tkwd1 + "\" + tnum + "001"               ' Not found, add it and exit
   ELSE                                                           '
      tWordCtr = 1: tWords(tWordCtr) = tkwd1 + "\" + tnum + "001" ' Create first entry kwd(ff)nnn001
   END IF
END SUB

SUB AddWordU(t AS STRING)
'----- Accumulate search words from Title, filter out duplicates
REGISTER i AS LONG
REGISTER j AS LONG
LOCAL tnum, tkwd1, tkwd2 AS STRING, k, b AS LONG
   IF ISNULL(t) THEN EXIT SUB                                     ' Ignore nulls
   tnum = LEFT$(t, 3): tkwd1 = UCASE$(MID$(t, 4))                 ' Split Kwd and Topic number
   IF uWordCtr THEN                                               ' Any table yet
      FOR i = 1 TO uWordCtr                                       ' Yes, search it
         b = INSTR(uWords(i), "\")                                ' Get break position
         tkwd2 = LEFT$(uWords(i), b - 1)                          ' Extract kwd from current entry
         IF tkwd1 = tkwd2 THEN                                    ' If found, try adding the tnum to the list
            FOR j = b + 1 TO LEN(uWords(i)) STEP 6                ' See if tnum already there
               IF tnum = MID$(uWords(i), j, 3) THEN               ' If so, bump the 'hit' count
                  k = VAL(MID$(uWords(i), j + 3, 3)): INCR k      ' Get count and bump it
                  MID$(uwords(i), j + 3, 3) = FORMAT$(k, "000")   ' Stuff it back in
                  EXIT SUB                                        ' And we're all done
               END IF                                             '
            NEXT j                                                '
            uwords(i) += tnum + "001"                             ' Not there, add it
            EXIT SUB                                              ' and we're done
         END IF                                                   '
      NEXT                                                        '
      INCR uWordCtr                                               '
      IF uWordCtr > UBOUND(uWords()) THEN _                       ' Enlarge table if needed
         REDIM PRESERVE uWords(1 TO 2 * uWordCtr) AS GLOBAL STRING'
      uWords(uWordCtr) = tkwd1 + "\" + tnum + "001"               ' Not found, add it and exit
   ELSE                                                           '
      uWordCtr = 1: uWords(uWordCtr) = tkwd1 + "\" + tnum + "001" ' Create first entry kwd(ff)nnn001
   END IF
END SUB

SUB      DEBUG (st AS STRING)
'---------- Print stuff to a console
STATIC Consl AS LONG
LOCAL szConsole AS ASCIIZ * 1024
   '----- Allocate a console if we haven't already done so
   IF Consl = 0 THEN
      AllocConsole
      SetConsoleTitle "PB Diagnostic Console"
      Consl = GetStdHandle(%STD_OUTPUT_HANDLE)
      SetConsoleTextAttribute 0, %FOREGROUND_RED OR _
                                     %FOREGROUND_GREEN OR _
                                     %FOREGROUND_BLUE
   END IF

   '----- print the line
   IF Consl > 0 THEN
      szConsole = st & $CRLF
      WriteConsole Consl, szConsole, LEN(szConsole), %NULL, %NULL
    END IF
END SUB
